/* 01_datacleaning */ 

use "$usedata_analysis/01_raw/spes_data_init",clear 

drop scell			// drop old stratification cell 

* Rename variable to help w/ overly long data variable flags 
rename _bb_workinformal _bb_worki
rename _f_bb_workinformal _f_bb_worki
rename _bb_lowestamount _bb_la
rename _f_bb_lowestamount _f_bb_la



rename _bb_expectedtuition _bb_expt
rename _f_bb_expectedtuition _f_bb_expt
rename _bb_othereducationexp _bb_othexp
rename _f_bb_othereducationexp _f_bb_othexp



gen _f_NT_edu = _NT_edu == . 
recode _NT_edu . = 0

gen _f_eeo_faminc = _eeo_faminc == . 
recode _eeo_faminc . = 0


sum _eeo_worksummer*

* OJT 
gen _eeo_worksummer_ojt = work_summer_ojt == 1
	replace _eeo_worksummer_ojt = . if endline == 0
* Unpaid work 
gen _eeo_worksummer_unpaid = work_summer_unpaid == 1
	replace _eeo_worksummer_unpaid = . if endline == 0
	
* Total hours per week 
* Missing to zero 
foreach var in formal informal unpaid{
replace work_summer_`var'_hrs_wk = 0 if work_summer_`var'_hrs_wk == . & endline == 1
if "`var'" != "unpaid"{
replace work_summer_`var'_earned = 0 if work_summer_`var'_earned == . & endline == 1
}
}

* Recode outlier
replace work_summer_formal_hrs_wk = . if work_summer_formal_hrs_wk == 500 
* Generate total variables
gen _eeo_worksummer_tothrs = work_summer_formal_hrs_wk + work_summer_informal_hrs_wk  + work_summer_unpaid_hrs_wk
gen _eeo_worksummer_totearn = work_summer_formal_earned + work_summer_informal_earned 

* Clean total - leave as missing if tr = 1 and missing SPES earnings
replace total = 0 if tr == 0 
gen _eeo_worksummer_earn_all = work_summer_formal_earned + work_summer_informal_earned  + total		// add spes earnings
sum _eeo_worksummer_totearn _eeo_worksummer_earn_all


* Conditional hours
clonevar _eeo_worksummer_tothrs_nz = _eeo_worksummer_tothrs
	recode _eeo_worksummer_tothrs_nz 0 = . 
* Conditional earnings 
clonevar _eeo_worksummer_totearn_nz = _eeo_worksummer_totearn

	recode _eeo_worksummer_totearn_nz 0 = . 

gen _eeo_worksummer_any_spes = _eeo_worksummer_any == 1 | spes_2016 == 1
	replace _eeo_worksummer_any_spes = . if missing(_eeo_worksummer_any)


* Hours 
** Fairly high hours 
gen _eeo_workhours = work_now_hrs_wk


	* Create employment/education categories 
	
gen _eeo_emped = _eeo_worknow == 1 &_eeo_enroll == 1
gen _eeo_empnoed = _eeo_worknow == 1 &_eeo_enroll == 0
gen _eeo_noemped = _eeo_worknow == 0 &_eeo_enroll == 1
gen _eeo_noempnoed = _eeo_worknow == 0 &_eeo_enroll == 0
/*
* Public vs private
gen _eeo_worknow_priv = work_now_type == "1" 
gen _eeo_worknow_pubn = work_now_type == "2" | work_now_type == "3" 
*/
*****		Found job through SPES	*****

gen _eeo_worknow_spes = _eeo_worknow == 1 & work_now_spes == 1
	replace _eeo_worknow_spes = 1 if _work_now_search_find == 6
	replace _eeo_worknow_spes = 0 if spes_2016 == 0 & spes_before == 0

	
gen _eeo_worknow_nospes = _eeo_worknow == 1 & _eeo_worknow_spes == 0

*** Make wages and hours unconditional 

replace _eeo_wage_monthly_now = 0 if _eeo_wage_monthly_now == . 
replace _eeo_workhours = 0 if _eeo_workhours == . 

* Check variables 
*sum _eeo_em* _eeo_noem* _eeo_enroll _eeo_workn


* Generate Academic variable 


gen _eeo_at = edu_hs_track1 == 1
replace _eeo_at = . if edu_hs_track1 == .

rename _eeo_gwa_norm _eeo_gwan
* Generate interaction variables 


** Shorten variable names 

rename  _eeo_enroll_nextyr _eeo_enr_ny
rename _eeo_wage_monthly_now _eeo_wage_mn
rename _eeo_edu_job_6mo_high _eeo_job_6mo_
rename _eeo_edu_job_wage_lowest _eeo_wage_low
rename _eeo_edu_job_wage_expect _eeo_wage_exp
rename _eeo_edu_expect_highest_college _eeo_exp_hcol

rename _eeo_hardtasks_index _eeo_tasksi
rename _eeo_selfesteem_index _eeo_selfi
rename _eeo_lifeskills_index _eeo_lifei



local jtypes "cv online peso fair walkin fam_refer official_refer"

foreach var in `jtypes'{
	rename jobsearch_`var' js`var'
}
rename jsofficial_refer jsoff




gen _eeo_faminc_muni = 0
qui sum _score_ppi_total, detail
replace _eeo_faminc = 1 if _score_ppi_total > r(p50)
replace _eeo_faminc = . if _score_ppi_total >= . 


rename _eeo_faminc _eeo_fi

gen _f_eeo_fi = _eeo_fi == . 
recode _eeo_fi . = 0


label var _eeo_faminc "[EEO] R PPI score in top 50 percentile"

** Work experience

clonevar _bb_wa = _bb_workany
clonevar _f_bb_wa = _f_bb_workany
sum _bb_wa _f_bb_wa



// shorten names
rename jobsearch_affect_discriminate jobsearch_affect_dis
rename jobsearch_affect_applyprocess jobsearch_affect_app
rename jobsearch_affect_fewcontact jobsearch_affect_con



****************
*		CLEAN OCCUPATIONS AND SORT 
****************


/* Key set of characteristics */ 


** Code type of jobs 



tab work_now_position

gen _eeo_work_now_position = trim(upper(work_now_position))

gen _eeo_work_now_positioncode = .



/* 1 = Food service

Waitress/waitstaff, cook, food server, service crew, kitchen staff, works at canteen, food server 

Service Crew (25)
Waitress (6)
Cook (2)
*/ 

replace _eeo_work_now_positioncode = 1 if regex(_eeo_work_now_position,"WAIT") | regex(_eeo_work_now_position,"SERVICE CREW") | regex(_eeo_work_now_position,"COOK")
// Note one service crew & cashier coded as service crew. One room attendant/cook coded as cook
replace _eeo_work_now_positioncode = 1 if (regex(_eeo_work_now_position,"FOOD SERVER") | regex(_eeo_work_now_position,"BAKE SHOP") | regex(_eeo_work_now_position,"KITCHEN STAFF")) & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 1 if  _eeo_work_now_position == "WORKS AT A CANTEEN" | _eeo_work_now_position == "SERVICE AND PRODUCTION CREW"

replace _eeo_work_now_positioncode = 1 if (regex(_eeo_work_now_position,"SERVE CREW") | regex(_eeo_work_now_position,"DINING CREW") ) & _eeo_work_now_positioncode == .


/* 2 = Customer/Client service and sales

Sales person, cashier

Cashier (11)
Sales Lady/Saleslady (8)
Sales associate/sales boy (4)


 */ 
replace _eeo_work_now_positioncode = 2 if (regex(_eeo_work_now_position,"SALE") | regex(_eeo_work_now_position,"CASHIER")) & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 2 if _eeo_work_now_position == "CALL CENTER AGENT" | _eeo_work_now_position == "ASSISSTING THE TAX PAYERS" | _eeo_work_now_position == "COSTUMER RELATION OFFICER"
replace _eeo_work_now_positioncode = 2 if _eeo_work_now_position == "COSTUMER SERVICE REPRESENTATIVE" | _eeo_work_now_position == "CREDIT ASSISTANT" | _eeo_work_now_position == "FRONT DESK"
replace _eeo_work_now_positioncode = 2 if _eeo_work_now_position == "FRONT DESK PERSONNEL" | _eeo_work_now_position == "LOCAL STORE MARKETING" | _eeo_work_now_position == "DISER"
replace _eeo_work_now_positioncode = 2 if _eeo_work_now_position == "MARKETING STAFF" | _eeo_work_now_position == "MARKETING OPERATION SECTION" | _eeo_work_now_position == "TOUR GUIDE" | _eeo_work_now_position == "INTERVIEWER"

/* 3 =  Encoder
Encoder/Encoding (4) */ 
replace _eeo_work_now_positioncode = 3 if regex(_eeo_work_now_position,"ENCOD") & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 3 if _eeo_work_now_position == "ENUMERATOR" | _eeo_work_now_position == "MANAGEMEMT INFORMATION SYSTEM RECORDER"

/* 4 = Caregiver/babysitter */ 
replace _eeo_work_now_positioncode = 4 if (regex(_eeo_work_now_position,"BABY S") | regex(_eeo_work_now_position,"CARE.*GIVE")) & _eeo_work_now_positioncode == .


/* 5 = Office assistant/clerical 
Office worker, library clerk, stock clerk  

Secretary (3)

*/ 


replace _eeo_work_now_positioncode = 5 if regex(_eeo_work_now_position,"OFFICE ") & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 5 if regex(_eeo_work_now_position,"LIBRARY") & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 5 if _eeo_work_now_position == "SECRETARY" 						| _eeo_work_now_position == "CLERICAL JOB" 				| _eeo_work_now_position == "HR ASSISTANT" 			
replace _eeo_work_now_positioncode = 5 if _eeo_work_now_position == "ASSISTANT ADMINISTRATION" 			| _eeo_work_now_position == "DEAN'S ASSISTANT" 			| _eeo_work_now_position == "INFORMATION OFFICER" 	
replace _eeo_work_now_positioncode = 5 if _eeo_work_now_position == "ASSISTANT IN THE BARANGAY (SPES)" 	| _eeo_work_now_position == "PERSONAL AID" 			| _eeo_work_now_position == "LEGESLATIVE STAFF" 	
replace _eeo_work_now_positioncode = 5 if regex(_eeo_work_now_position,"CLERK") & _eeo_work_now_positioncode == .   // includes stock clerk 

*replace _eeo_work_now_positioncode = 5 if _eeo_work_now_position

/* 6 = Cleaning/utility/general laborer

* Cleaner (4) 
* Housekeeper (2) - there is one "house keeper, food server" - coded as food server 

Laborer (3) 
Construction worker (2) 
Gasoline boy (2)
Sales Utility Clerk (1) 
Utility worker (1)
All aroung helper (1)
Maintenance (2) */ 

list _eeo_work_now_position if regex(_eeo_work_now_position,"CLEANER")& _eeo_work_now_positioncode == .
list _eeo_work_now_position if regex(_eeo_work_now_position,"HOUSE.*KEE") & _eeo_work_now_positioncode == .

replace _eeo_work_now_positioncode = 6 if regex(_eeo_work_now_position,"CLEANER") & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 6 if regex(_eeo_work_now_position,"HOUSE.*KEEP") & _eeo_work_now_positioncode == .

 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "LABORER" 						| _eeo_work_now_position == "CONSTRUCTION WORKER" 	| _eeo_work_now_position == "ROOMBOY"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "CONSTRUCTION WORKER(HELPER)" 	| _eeo_work_now_position == "CONTRACTION WORKER" 	| _eeo_work_now_position == "GASOLINE BOY"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "ALL AROUND HELPER" 			| _eeo_work_now_position == "MAINTENANCE" 			| _eeo_work_now_position == "DELIVERING OF WATER"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "HOUSE BOY/CARE TAKER" 		| _eeo_work_now_position == "HELPER" 				| _eeo_work_now_position == "HARVESTING"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "HANDLER OF FARM" 				| _eeo_work_now_position == "GARDENER" 				| _eeo_work_now_position == "UMBRELLA GIRL"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "MEESENGER" 					| _eeo_work_now_position == "GASOLINE PUMP BOY" 	| _eeo_work_now_position == "GUEST ATTENDANT"
 replace _eeo_work_now_positioncode = 6 if _eeo_work_now_position == "TRICYCLE DRIVER" 				| _eeo_work_now_position == "COLLECTOR" 			| _eeo_work_now_position == "SECURITY GUARD"
replace _eeo_work_now_positioncode = 6 if regex(_eeo_work_now_position,"UTILITY") & _eeo_work_now_positioncode == .


/* 7 = Vendor/Sales
vendor, fish vendor, online sales
*/ 

replace _eeo_work_now_positioncode = 7 if _eeo_work_now_position == "VENDOR" | _eeo_work_now_position == "FISH VENDOR" | _eeo_work_now_position == "ONLINE DEALER/RESELLER" | _eeo_work_now_position == "ONLINE DEALER (BUSINESS OWNER)" 
replace _eeo_work_now_positioncode = 7 if _eeo_work_now_position == "DEALER" | _eeo_work_now_position == "DEALER/AGENT" | _eeo_work_now_position == "TINDERO" 


/* 8 = Production wokers
Production Operator (3)  
Machine operator (2)
Operator (2) */
replace _eeo_work_now_positioncode = 8 if _eeo_work_now_position == "PRODUCTION OPERATOR" 	| _eeo_work_now_position == "MACHINE OPERATOR" 	| _eeo_work_now_position == "OPERATOR"
replace _eeo_work_now_positioncode = 8 if _eeo_work_now_position == "PRODUCTION CREW" 		| _eeo_work_now_position == "REPACKER" 			| _eeo_work_now_position == "PACKER" 	| _eeo_work_now_position ==  "FACTORY WORKER"
replace _eeo_work_now_positioncode = 8 if _eeo_work_now_position == "SORTER" 		| _eeo_work_now_position == "STOCK CONTROLLER" 			| _eeo_work_now_position == "XEROX MACHINE OPERATOR" 	
 
/* 9 = Health/education 
Teacher, tutor, test proctor
*/ 
replace _eeo_work_now_positioncode = 9 if _eeo_work_now_position == "TEACHER" 				| _eeo_work_now_position == "TUTOR" 		| _eeo_work_now_position == "PROCTOR (TOAIC)" 	| _eeo_work_now_position == "ENGLISH LANGUAGE TUTOR"
replace _eeo_work_now_positioncode = 9 if _eeo_work_now_position == "ASSISTANT PHARMACY" 	| _eeo_work_now_position == "PHARMACY AIDE" | _eeo_work_now_position == "DENTIST ASSISTANT" | _eeo_work_now_position == "ASSISTANT IN CLINIC"

/* 10 = Misc. Assistant /Unspecified

Assistant, general assistant, virtual assistant, student assistant 
Student assistant (6)
STUDENT AIDE (2) 
ASSITANT (1)
*/ 

replace _eeo_work_now_positioncode = 10 if (_eeo_work_now_position == "ASSIST" | _eeo_work_now_position == "STUDENT ASSISTANT")  & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 10 if _eeo_work_now_position == "VIRTUAL ASSISTANT" | _eeo_work_now_position == "STUDENT AIDE" | _eeo_work_now_position == "ASSITANT" | _eeo_work_now_position == "ASSISTANT"
replace _eeo_work_now_positioncode = 10 if _eeo_work_now_position == "CHECKERS" | _eeo_work_now_position == "CREW" | _eeo_work_now_position == "ASSITANT" | _eeo_work_now_position == "ASSISTANT"
replace _eeo_work_now_positioncode = 10 if _eeo_work_now_position == "STAFF" | _eeo_work_now_position == "TRAINEE" | _eeo_work_now_position == "AGENT"

/* 11 = Skilled technical

3D VISUALIZER AND CAD OPERATOR (1) 
Computer/tech (4) 
 */ 
list _eeo_work_now_position if (regex(_eeo_work_now_position,"TECHNICAL") | regex(_eeo_work_now_position,"COMPUTER"))   & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 11 if (regex(_eeo_work_now_position,"TECHNICAL") 	| regex(_eeo_work_now_position,"COMPUTER"))  & _eeo_work_now_positioncode == .
replace _eeo_work_now_positioncode = 11 if _eeo_work_now_position == "3D VISUALIZER AND CAD OPERATOR" 	| _eeo_work_now_position == "AUTOMECAHNICS" 	| _eeo_work_now_position == "ELECTRICIAN"
replace _eeo_work_now_positioncode = 11 if _eeo_work_now_position == "AUTOCAD OPERATOR" 				| _eeo_work_now_position == "PLUMBER" 			| _eeo_work_now_position == "SHOP MECHANIC"
replace _eeo_work_now_positioncode = 11 if _eeo_work_now_position == "SHIFTING ENGINEER" 				| _eeo_work_now_position == "MEDICAL TECHNOLOGIST"

/* 12 = Other */ 
replace _eeo_work_now_positioncode = 12 if _eeo_work_now_position == "DJ ON THE RADIO" | _eeo_work_now_position == "MANAGER" | _eeo_work_now_position == "ASSISTANT OF MAKE UP ARTIST"
replace _eeo_work_now_positioncode = 12 if _eeo_work_now_position == "LOGESTICS DEPARTMENT" 




* Office assistant includes library

lab def position 1 "Food service" 2 "Sales" 3 "Encoder" 4 "Caregiver/Babysitter" 5 "Office Assistant/Clerical" 6 "Cleaning/Laborer" 7 "Vendor/Sales" 8 "Production Worker" 9 "Education/Health" 10 "Misc./Unspecified Assistant" 11 "Skilled/technical" 12 "Other" 
lab val _eeo_work_now_positioncode position


tab _eeo_work_now_position if _eeo_work_now_positioncode  == .,sort

tab _eeo_work_now_positioncode if !missing(_eeo_work_now_position), sort mi


forval i = 1/13{ 
gen _eeo_position`i' = _eeo_work_now_positioncode == `i'
replace _eeo_position`i' = . if missing(_eeo_work_now_positioncode)
}

gen _eeo_aprox_skilled = 0 if _eeo_worknow == 1
foreach i in 1 2 3 5 8 9 11 12{
replace _eeo_aprox_skilled = 1 if _eeo_position`i' == 1
}


*****		Work type	*****


gen _eeo_worknow_private = work_now_type == "1"
gen _eeo_worknow_pubn = work_now_type == "2" | work_now_type == "3" 

replace _eeo_worknow_private = 1 if work_now_type_other == "relatives"
replace _eeo_worknow_private = 1 if work_now_type_other == "pampamilyang negosyo"
replace _eeo_worknow_private = 1 if work_now_type_other == "food chain"
replace _eeo_worknow_private = 1 if work_now_type_other == "pribadong bahay"
replace _eeo_worknow_private = 1 if work_now_type_other == "self employed."			// 2 listed as self-employed, call private
replace _eeo_worknow_private = 1 if work_now_type_other == "self employment"
replace _eeo_worknow_private = 1 if work_now_type_other == "private person (uncle)"


replace _eeo_worknow_pubn = 1 if work_now_type_other == "public organization"
replace _eeo_worknow_pubn = 1 if work_now_type_other == "school"
replace _eeo_worknow_private = . if work_now_type_other == "-99"
replace _eeo_worknow_pubn = . if work_now_type_other == "-99"
tab work_now_type_other if _eeo_worknow_private == 0 & _eeo_worknow_pubn == 0


*****		Regular employees	*****

tab work_now_worker_type if work_now == 1,mi

gen _eeo_worker_reg = work_now_worker_type == 1
	replace _eeo_worker_reg = . if missing(work_now_worker_type)

*****		OJT/internship	*****

gen _eeo_ojt = wok_now_ojt == 1
	replace wok_now_ojt = . if missing(wok_now_ojt)
	

***********

*****		Months employed at job *****


		* Endline survey date 

		sum survey_date 

		gen _survmo = regexs(1) if regexm(survey_date,"2017-([0-9][0-9])-([0-9][0-9])")
		gen _survday = regexs(2) if regexm(survey_date,"2017-([0-9][0-9])-([0-9][0-9])")

		destring _survmo,replace
		destring _survday,replace 

		gen _survdate = mdy(_survmo, _survd, 2017)	
		format _survdate %d
	
gen _eeo_months_emp = (_survdate - work_now_start)/12




*****		How found work, how long at job  *****

tab _work_now_search_find,sort

gen _eeo_found_ref = _work_now_search_find == 4
	replace _eeo_found_ref = . if missing(_work_now_search_find)

gen _eeo_found_direct = _work_now_search_find == 1
	replace _eeo_found_direct = . if missing(_work_now_search_find)

gen _eeo_worknow_new = work_now_start >= td(01jun2016)
	replace _eeo_worknow_new = . if missing(work_now_start)
 

 


*****		Salaried (vs. daily/stipend) *****

gen _eeo_salaried = work_now_wage_type == 2
	replace _eeo_salaried = . if missing(work_now_wage_type)
	
 
 
** Make missing if no endline
foreach var in _eeo_worknow_priv _eeo_worknow_pub _eeo_worknow_spes _eeo_worknow_nospes{
replace `var' = . if endline == 0 
}

** Make missing if not in study 

foreach var in emped empnoed noemped noempnoed{

replace _eeo_`var' = . if randomization == 0
}

** Label variables 

local l_bb_workformal "Formal Experience"
local l_eeo_fi "High Income"
local l_bb_wa "Any Experience"


label var spes_2016 "Enrolled in SPES"
label var tr "Enrolled in SPES (admin)"
label var treatment "Invited to SPES"

save "$usedata_analysis/spes_data",replace
